set the text of field ("cuttingindex" & world) to EMPTY
set the text of field "BatchErrors" to EMPTY
BatchParse(world, startnum)
end
on ProcessPerson fname
cursor(4)
if ParseFile(fname & ".SCR") = -1 then
alert("problem with script file")
cursor(0)
go(2)
abort()
end if
if MakeIndex(fname & ".TXT") = -1 then
alert("problem with text cuttings file")
cursor(0)
go(2)
abort()
end if
cursor(0)
end
on BatchParse world, startnum
global gMakerName, mpath, gMakerNum
set mpath to the pathName & world & ":"
set fileList to []
set num to the number of lines in field "filenames"
repeat with i = 1 to num
set n to line i of field "filenames"
if notEmpty(n) then
add(fileList, n)
end if
end repeat
set num to the number of lines in field "filenames2"
repeat with i = 1 to num
set n to line i of field "filenames2"
if notEmpty(n) then
add(fileList, n)
end if
end repeat
set num to count(fileList)
set tmperrlist to []
repeat with i = startnum to num
set fname to getAt(fileList, i)
put "batch parsing " && fname
set gMakerNum to NameToNum(fname)
set gMakerName to fname
set err to ParseFile(fname & ".scr")
if err = -1 then
add(tmperrlist, fname)
end if
set the itemDelimiter to "."
set fname to item 1 of fname
set the itemDelimiter to ","
set err to MakeIndex(fname & ".TXT")
if err = -1 then
add(tmperrlist, fname)
end if
end repeat
set num to count(tmperrlist)
put "Errors in " && num && "script files:"
set tmpnum to the number of lines in field "BatchErrors" + 1
put "Errors in " && num && "script files:" into line tmpnum of field "BatchErrors"
repeat with i = 1 to num
put i & ":" && getAt(tmperrlist, i)
set tmpnum to the number of lines in field "BatchErrors" + 1
put i & ":" && getAt(tmperrlist, i) into line tmpnum of field "BatchErrors"
end repeat
end
on ParseFile name
global fileObj, txt, mpath
if the paramCount = 0 then
set fileObj to FileIO(mnew, "?read", "TEXT")
else
set fileObj to FileIO(mnew, "read", mpath & name)
end if
if not objectp(fileObj) then
alert("error opening file" && name)
exit
else
set txt to fileObj(mReadFile)
fileObj(mdispose)
set err to Parse()
if err = -1 then
return -1
exit
end if
end if
put "saving..."
saveNewParse()
return 0
end
on Parse
global txt, gChapters, gArticles, gCaptions, gTitles, gDates, gName, gTotalChapters, gnuggets
set debug to 0
if debug then
set the text of field "parsed" to EMPTY
end if
set gChapters to []
set gArticles to []
set tmplist to []
set gCaptions to EMPTY
set gTitles to EMPTY
set tmptitles to EMPTY
set tmpcaptions to EMPTY
set gDates to []
set tmpdates to []
set gnuggets to EMPTY
set captionText to EMPTY
set gName to EMPTY
set num to 1
set totalnum to the number of lines in txt
set chap to 0
set inNuggets to 0
set article to 0
repeat with num = 1 to totalnum
set ln to line num of txt
if not notEmpty(ln) then
next repeat
end if
if chap = 0 then
if char 1 of ln <> "1" then
next repeat
else
set chap to 1
end if
end if
if chap = 1 then
if ln contains "Name" then
set num to num + 1
set gName to word the number of words in line num of txt of line num of txt
end if
end if
if char 1 of ln <> string(chap) then
if inNuggets then
put "error in the nuggets."
return -1
exit
end if
if debug then
put "------------------------------------------" into line the number of lines in field "parsed" + 1 of field "parsed"
end if
set chap to chap + 1
if chap > 2 then
add(gArticles, tmplist)
set the itemDelimiter to "@"
set capnum to the number of items in gCaptions
if gCaptions = EMPTY then
set gCaptions to "@" & tmpcaptions
else
put tmpcaptions into item capnum + 1 of gCaptions
end if
set the itemDelimiter to ","
set the itemDelimiter to "@"
set titnum to the number of items in gTitles
if gTitles = EMPTY then
set gTitles to "@" & tmptitles
else
put tmptitles into item titnum + 1 of gTitles
end if
set the itemDelimiter to ","
add(gDates, tmpdates)
end if
if not (ln contains "nuggets") then
add(gChapters, ln)
else
set inNuggets to 1
end if
set tmplist to []
set tmptitles to []
set tmpcaptions to EMPTY
set tmpdates to []
if debug then
put "Chapter:" && ln into line the number of lines in field "parsed" + 1 of field "parsed"
end if
set num to num + 1
repeat while not notEmpty(line num of txt)
set num to num + 1
end repeat
set oldchap to chap
set chap to charToNum(char 1 of word 1 of line num of txt) - 48
if not inNuggets and (((chap - oldchap) > 5) or (oldchap > chap) or (chap < 0)) then
put "error in " && oldchap & "." & article
return -1
exit
end if
put gName && "chap = " && chap
set num to num - 1
set article to 0
next repeat
end if
if inNuggets = 1 then
set article to article + 1
delete word 1 of ln
set the itemDelimiter to "#"
set tmpnugnum to the number of items in gnuggets
if gnuggets = EMPTY then
set gnuggets to ln
else
put ln into item tmpnugnum + 1 of gnuggets
end if
set the itemDelimiter to ","
set num to num + 1
if debug then
put chap & "." & article && ln && line num of txt into line the number of lines in field "parsed" + 1 of field "parsed"
end if
next repeat
end if
if (ln contains "audio") or (ln contains "sound") or (ln contains "bbc") then
set article to article + 1
add(tmplist, article, "sound")
set num to num + 1
set num to SkipToNumber(txt, chap, article, num)
if num = -1 then
return -1
exit
end if
if debug then
put chap & "." & article && "Sound:" into line the number of lines in field "parsed" + 1 of field "parsed"
end if
if not (line num of txt contains "caption") then
put "error: " && chap & "." & article && "should have a caption"
return -1
exit
end if
set num to num + 1
if debug then
put chap & "." & article & " Caption:" && line num of txt into line the number of lines in field "parsed" + 1 of field "parsed"
end if
set the itemDelimiter to "#"
put line num of txt into item article of tmpcaptions
set the itemDelimiter to ","
next repeat
end if
if (ln contains "pic") or (ln contains "photo") then
set article to article + 1
add(tmplist, article, "pic")
set num to num + 1
set num to SkipToNumber(txt, chap, article, num)
if num = -1 then
return -1
exit
end if
if debug then
put chap & "." & article && "Pic:" into line the number of lines in field "parsed" + 1 of field "parsed"
end if
if not (line num of txt contains "caption") then
put "error: " && chap & "." & article && "should have a caption"
return -1
exit
end if
set num to num + 1
if debug then
put chap & "." & article & " Caption:" && line num of txt into line the number of lines in field "parsed" + 1 of field "parsed"
end if
set the itemDelimiter to "#"
put line num of txt into item article of tmpcaptions
set the itemDelimiter to ","
next repeat
end if
if (ln contains "movie") or (ln contains "video") or (ln contains "graphic") or (ln contains "footage") or (ln contains "film") or (ln contains "animation") then
set article to article + 1
if (ln contains "graphic") or (ln contains "animation") then
add(tmplist, article, "animation")
else
add(tmplist, article, "movie")
end if
set num to num + 1
set num to SkipToNumber(txt, chap, article, num)
if num = -1 then
return -1
exit
end if
if debug then
put chap & "." & article && "Movie:" && line num of txt into line the number of lines in field "parsed" + 1 of field "parsed"
end if
if not (line num of txt contains "caption") then
put "error: " && chap & "." & article && "should have a caption"
return -1
exit
end if
set num to num + 1
if debug then
put chap & "." & article & " Caption:" && line num of txt into line the number of lines in field "parsed" + 1 of field "parsed"
end if
set the itemDelimiter to "#"
put line num of txt into item article of tmpcaptions
set the itemDelimiter to ","
next repeat
end if
if (ln contains "partwork") and (chap > 1) then
set article to article + 1
add(tmplist, article, "partwork")
set num to num + 1
set num to SkipToNumber(txt, chap, article, num)
if num = -1 then
return -1
exit
end if
if debug then
put chap & "." & article && "Partwork2:" && line num of txt into line the number of lines in field "parsed" + 1 of field "parsed"
end if
if not (line num of txt contains "caption") then
put "error: " && chap & "." & article && "should have a caption"
return -1
exit
end if
set num to num + 1
if debug then
put chap & "." & article & " Caption:" && line num of txt into line the number of lines in field "parsed" + 1 of field "parsed"
end if
set the itemDelimiter to "#"
put line num of txt into item article of tmpcaptions
set the itemDelimiter to ","
add(tmpdates, article, " ")
next repeat
end if
if ln contains "cutting" then
set article to article + 1
add(tmplist, article, "cutting")
if debug then
put chap & "." & article && "Cutting:" into line the number of lines in field "parsed" + 1 of field "parsed"
end if
set num to num + 1
set num to SkipToNumber(txt, chap, article, num)
if num = -1 then
return -1
exit
end if
if notEmpty(line num + 1 of txt) then
set num to num + 1
else
delete word 1 of line num of txt
end if
if debug then
put chap & "." & article && "Headline:" && line num of txt into line the number of lines in field "parsed" + 1 of field "parsed"
end if
set the itemDelimiter to "#"
put line num of txt into item article of tmptitles
set the itemDelimiter to ","
set num to num + 1
set num to SkipToNumber(txt, chap, article, num)
if num = -1 then
return -1
exit
end if
if notEmpty(line num + 1 of txt) then
set num to num + 1
else
delete word 1 of line num of txt
end if
if debug then
put chap & "." & article && "Date:" && line num of txt into line the number of lines in field "parsed" + 1 of field "parsed"
end if
add(tmpdates, article, line num of txt)
set num to num + 1
set num to SkipToNumber(txt, chap, article, num)
if num = -1 then
return -1
exit
end if
if not (line num of txt contains "caption") then
put "error: " && chap & "." & article && "should have a caption"
return -1
exit
end if
set num to num + 1
if debug then
put chap & "." & article && "Caption:" && line num of txt into line the number of lines in field "parsed" + 1 of field "parsed"
end if
set the itemDelimiter to "#"
put line num of txt into item article of tmpcaptions
set the itemDelimiter to ","
next repeat
end if
set article to article + 1
delete word 1 of ln
add(tmplist, ln)
set num to num + 1
if debug then
put chap & "." & article && ln && line num of txt into line the number of lines in field "parsed" + 1 of field "parsed"
end if
end repeat
addAt(gChapters, 1, "INTRO")
addAt(gArticles, 1, ["Partwork"])
addAt(gDates, 1, [])
set gTotalChapters to count(gChapters)
return 0
end
on notEmpty str
set num to the number of chars in str
if num > 5 then
return 1
end if
set alphanum to "abcdefghijklmnopqrstuvwxyz1234567890!@$%^&*()-=_+{}[];:'\|,.<>/?`~"
repeat with j = 1 to num
if alphanum contains char j of str then
return 1
exit repeat
end if
end repeat
return 0
end
on SkipToNumber txt, chap, art, num
set start to num
repeat while char 1 of word 1 of line num of txt <> string(chap)